# -*- coding: utf-8 -*-

"""
This file includes several common unitilies for preprocessing data.
"""

from dateparser import parse



def isfloat(value):
	"""Judge whether the input is  a float or not

	works in cases when the input can contains nan (in pandas), and thus direct float(XX) can lead to value error

	Args:
		arg1 (int): Description of arg1
		arg2 (str): Description of arg2

	Returns:
		bool: Description of return value

	"""

	try:
		float(value) 
		return True
	except:
		return False


def judString(target_str, terms):
	"""judge whether any of the word in terms is in target_str

	Args:
		target_str (str): target string
		terms (list of str): list of words
	Returns:
		bool: 
	"""
	for t in terms:
		if t in target_str:
			return True
	return False


def judCount(target_str, terms):
	"""judge how many of term in terms appearn in target_str

	Args:
		target_str (str): target string
		terms (list of str): list of words
	Returns:
		c (int): count  of appearances
	"""
	c = 0
	for t in terms:
		if t in target_str:
			c += 1
	return c


def judDict(d, terms):

	"""judge whether any of the word in terms is in target_str

	Args:
		target_str (dict): target dictionary
		terms (list of str): list of words
	Returns:
		bool: 

	"""
	for t in terms:
		if t in d:
			return True
	return False



## first, clean dateparser
def cleanDate(s):
    ### dates in 2017 looks like: 6月30日
    if  not isfloat(s):
        if '月'.decode('utf-8') in s:
            s = s.replace('月'.decode('utf-8'), "-")
            s = s.replace('日'.decode('utf-8'), "")
            s = "2017-" + s
            
    return s

########### monthly counts for plot ( plotk.r )
def parseDate (s):
    # s = s.encode('utf-8')
    try:
        return  str(parse(s))[:10]
    except Exception as e:
        #print s, e
        return ""